1 Global Parameters

###############################
#
#define settings
#
###############################

GET_SAMPLE_NAMES <- TRUE # if set to FALSE define it in the next line
#sample_names_user <- c()

unique <- params$unique  # how many peptides should be at least necessary to identify a protein


# which types should be evaluated TRUE or FALSE could be used
RAZOR <- TRUE
UNIQUE <- TRUE
LFQ <- FALSE

# define path of the proteinGroups.txt file(s)
path = params$path

2 Introduction

2.1 Unique | Razor&Unique procedure

Filtering:

  • Potential.contaminant != "+" -> remove where contaminants

  • Reverse != "+"-> remove where reverse = +

  • Only.identified.by.site != "+" -> remove where only identified by site = +

  • data[name] >= unique -> keep only identifications which have greater or equal number (defined via variable unique) of razor/unique peptides | typically this is set to 1 or 2

Data analysis show the number of identified proteins and the heatmap. In Summary the final results are visualized as boxplot and a table with the summarized values is shown.

2.2 LFQ-Procedure

Filtering:

  • Potential.contaminant != "+" -> remove where contaminants = +
  • Reverse != "+"-> remove where reverse = +
  • Only.identified.by.site != "+" -> remove where only identified by site = +

Then the log2of the LFQ intensities is calculated and all -Inf values were substituted by NA using the following command: lfq[lfq == -Inf] <- NA

Data analysis show the number of identified proteins and the heatmap. In Summary the final results are visualized as boxplot and a table with the summarized values is shown.

3 Load required packages and functions

# load libraries
library(tidyverse)
library(dplyr)
library(pheatmap)
library(ggplot2)
library(reshape2)
library(knitr)
library(plotly)
# load functions
source("filtering_function.R")
print("loading sucessful")
## [1] "loading sucessful"

4 Data Analysis

The txt-files of the defined folder are read in, processed and filtered automatically. This section shows the code, heatmaps and also some summary statistics.

infile <- read.csv(path, dec=".", sep="\t")
res_raw <- list(infile)
########## get sample names
if (GET_SAMPLE_NAMES == TRUE){
  #get sample names
  tmp <- colnames(res_raw[[1]])
  tmp <- tmp[grep("Razor...unique.peptides.",tmp)]
  tmp <- sub(".*Razor...unique.peptides.", "",tmp)
  sample_names_raw <- tmp
  rm(tmp)
  #create sample names
  sample_names_razor <- paste("Razor...unique.peptides.",sample_names_raw, sep="")
  sample_names_unique <- paste("Unique.peptides.",sample_names_raw, sep="")
  sample_names_lfq <- paste("LFQ.intensity.",sample_names_raw, sep="")
} else {
    res_raw[[1]] <- res_raw[[1]] %>%
       rename_with(~ paste("Razor_", sample_names_user, sep = ""),
                   starts_with("Razor...unique.peptides.")) %>%
       rename_with(~ paste("Unique_", sample_names_user, sep = ""),
                   starts_with("Unique.peptides.")) %>%
       rename_with(~ paste("LFQ_", sample_names_user, sep = ""),
                   starts_with("LFQ.intensity."))
    # get column names
    sample_names_razor <- colnames(res_raw[[i]][grep("Razor_",
                                                colnames(res_raw[[i]]))])
    sample_names_unique <- colnames(res_raw[[i]][grep("Unique_",
                                                colnames(res_raw[[i]]))])
    sample_names_lfq <- colnames(res_raw[[i]][grep("LFQ_",
                                                 colnames(res_raw[[i]]))])
}

########## create result data frame & result lists
# create data frame for razor
results_2gether_razor <- data.frame(matrix(ncol = length(sample_names_razor), nrow = 1))
colnames(results_2gether_razor) <- sample_names_razor
#create data frame for unique
results_2gether_unique <- data.frame(matrix(ncol = length(sample_names_unique), nrow = 1))
colnames(results_2gether_unique) <- sample_names_unique
#create data frame for lfq
results_2gether_lfq <- data.frame(matrix(ncol = length(sample_names_lfq), nrow = 1))
colnames(results_2gether_lfq) <- sample_names_lfq

#results lists
results_razor <- list()
results_unique <- list()
results_lfq <- list()

########## perform evaluation
if (RAZOR == TRUE) {
  cat("\n")
  cat("##","Razor","\n")
  cat("\n")
    data_raw <- as.data.frame(res_raw[[1]])
    data <- filtering(data_raw)
    fasta <- data$Fasta.headers
    results <- data.frame(matrix(ncol = length(sample_names_razor), nrow = nrow(data)))
    colnames(results) <- sample_names_razor
    i <- 0
    for (i in 1:length(sample_names_razor)){
      name <- sample_names_razor[i]
      tmp <- (data[name] >= unique)
      results[i] <- tmp
      cat(name,"<br/>",sum(tmp, na.rm = TRUE),"<br/>")
    }
    results_2gether_razor[1,] <- sapply(results,function(x)sum(x, na.rm = TRUE))
    results$FASTA <- data$Fasta.headers
    results_razor[[1]] <- results
    cat("\n")
    
        #create heatmaps
    paletteLength <- 2
    myColor <- colorRampPalette(c("navy", "white",
                                  "firebrick3"))(paletteLength)
      data4pheatmap <- results[,-ncol(results)]
      data4pheatmap <- data4pheatmap*1
      data4pheatmap[data4pheatmap == 0] <- NA
      # remove NA rows
      ind <- apply(data4pheatmap, 1, function(x) all(is.na(x)))
      data4pheatmap_clear <- data4pheatmap[!ind,]
      data4pheatmap_clear[is.na(data4pheatmap_clear)] <- 0
      if (ncol(data4pheatmap_clear) < 2){
        cat("\n")
        cat("##"," No heatmap possible. Too less columns in the result
            table.","\n")
      } else {
        pheatmap(data4pheatmap_clear,
                 legend_breaks = c(0,1),
                 color = myColor,
                 treeheight_row = 10,
                 angle_col ="45",
                 treeheight_col = 10,
                 legend = TRUE,
                 labels_row = rep("",nrow(data4pheatmap)),
                 labels_col = sample_names_raw)
        cat("\n")
      }
  
}

4.1 Razor

Razor…unique.peptides.E4_D2_2_Gesamt_01
16
Razor…unique.peptides.E4_D2_2_Gesamt_02
14
Razor…unique.peptides.E4_D2_3_Gesamt_01
20
Razor…unique.peptides.E4_D2_3_Gesamt_02
21
Razor…unique.peptides.E4_D2_Pool_20_B1_01
2
Razor…unique.peptides.E4_D2_Pool_20_B1_02
4
Razor…unique.peptides.E4_D2_Pool_20_B10_01
5
Razor…unique.peptides.E4_D2_Pool_20_B10_02
5
Razor…unique.peptides.E4_D2_Pool_20_B2_01
2
Razor…unique.peptides.E4_D2_Pool_20_B2_02
3
Razor…unique.peptides.E4_D2_Pool_20_B3_01
5
Razor…unique.peptides.E4_D2_Pool_20_B3_02
4
Razor…unique.peptides.E4_D2_Pool_20_B4_01
8
Razor…unique.peptides.E4_D2_Pool_20_B4_02
6
Razor…unique.peptides.E4_D2_Pool_20_B5_01
11
Razor…unique.peptides.E4_D2_Pool_20_B5_02
10
Razor…unique.peptides.E4_D2_Pool_20_B6_01
17
Razor…unique.peptides.E4_D2_Pool_20_B6_02
18
Razor…unique.peptides.E4_D2_Pool_20_B7_01
15
Razor…unique.peptides.E4_D2_Pool_20_B7_02
15
Razor…unique.peptides.E4_D2_Pool_20_B8_01
10
Razor…unique.peptides.E4_D2_Pool_20_B8_02
8
Razor…unique.peptides.E4_D2_Pool_20_B9_01
6
Razor…unique.peptides.E4_D2_Pool_20_B9_02
7

if (UNIQUE == TRUE) {
  cat("\n")
  cat("##","Unique","\n")
  cat("\n")
    data_raw <- as.data.frame(res_raw[[1]])
    data <- filtering(data_raw)
    results <- data.frame(matrix(ncol = length(sample_names_unique), nrow = nrow(data)))
    colnames(results) <- sample_names_unique
    i <- 0
    for (i in 1:length(sample_names_unique)){
      name <- sample_names_unique[i]
      tmp <- (data[name] >= unique)
      results[i] <- tmp
      cat(name,"<br/>",sum(tmp, na.rm = TRUE),"<br/>")
    }
    results_2gether_unique[1,] <- sapply(results,function(x)sum(x, na.rm = TRUE))
    results$FASTA <- data$Fasta.headers
    results_unique[[1]] <- results
    cat("\n")
    #create heatmaps
    paletteLength <- 2
    myColor <- colorRampPalette(c("navy", "white",
                                  "firebrick3"))(paletteLength)
      data4pheatmap <- results[,-ncol(results)]
      data4pheatmap <- data4pheatmap*1
      data4pheatmap[data4pheatmap == 0] <- NA
      # remove NA rows
      ind <- apply(data4pheatmap, 1, function(x) all(is.na(x)))
      data4pheatmap_clear <- data4pheatmap[!ind,]
      data4pheatmap_clear[is.na(data4pheatmap_clear)] <- 0
      if (ncol(data4pheatmap) < 2){
        cat("\n")
        cat("##"," No heatmap possible. Too less columns in the result
            table.","\n")
      } else {
        pheatmap(data4pheatmap_clear,
                 legend_breaks = c(0,1),
                 color = myColor,
                 treeheight_row = 10,
                 angle_col ="45",
                 treeheight_col = 10,
                 legend = TRUE,
                 labels_row = rep("",nrow(data4pheatmap)),
                 labels_col = sample_names_raw)
        cat("\n")
      }
}

4.2 Unique

Unique.peptides.E4_D2_2_Gesamt_01
11
Unique.peptides.E4_D2_2_Gesamt_02
9
Unique.peptides.E4_D2_3_Gesamt_01
14
Unique.peptides.E4_D2_3_Gesamt_02
15
Unique.peptides.E4_D2_Pool_20_B1_01
0
Unique.peptides.E4_D2_Pool_20_B1_02
1
Unique.peptides.E4_D2_Pool_20_B10_01
3
Unique.peptides.E4_D2_Pool_20_B10_02
2
Unique.peptides.E4_D2_Pool_20_B2_01
1
Unique.peptides.E4_D2_Pool_20_B2_02
1
Unique.peptides.E4_D2_Pool_20_B3_01
2
Unique.peptides.E4_D2_Pool_20_B3_02
2
Unique.peptides.E4_D2_Pool_20_B4_01
4
Unique.peptides.E4_D2_Pool_20_B4_02
2
Unique.peptides.E4_D2_Pool_20_B5_01
6
Unique.peptides.E4_D2_Pool_20_B5_02
6
Unique.peptides.E4_D2_Pool_20_B6_01
12
Unique.peptides.E4_D2_Pool_20_B6_02
13
Unique.peptides.E4_D2_Pool_20_B7_01
9
Unique.peptides.E4_D2_Pool_20_B7_02
9
Unique.peptides.E4_D2_Pool_20_B8_01
7
Unique.peptides.E4_D2_Pool_20_B8_02
5
Unique.peptides.E4_D2_Pool_20_B9_01
4
Unique.peptides.E4_D2_Pool_20_B9_02
5

if (LFQ == TRUE) {
  cat("\n")
  cat("##","LFQ","\n")
  cat("\n")
    data_raw <- as.data.frame(res_raw[[1]])
    data <- filtering(data_raw)
    
    results <- data.frame(matrix(ncol = length(sample_names_lfq), nrow = nrow(data)))
    colnames(results) <- sample_names_lfq
    i <- 0
    for (i in 1:length(sample_names_lfq)){
      name <- sample_names_lfq[i]
      tmp <- log2(data[name])
      tmp[tmp == -Inf] <- NA
      results[i] <- tmp
      cat(name,"<br/>",sum(!is.na(tmp)),"<br/>")
      
    }
    

    results_2gether_lfq[1,] <- sapply(results,function(x)sum(!is.na(x)))
    results$FASTA <- data$Fasta.headers
    results_lfq[[1]] <- results
    
    cat("\n")
    
    #create heatmaps
    paletteLength <- 50
    myColor <- colorRampPalette(c("navy", "white",
                                  "firebrick3"))(paletteLength)
      data4pheatmap <- results[,-ncol(results)]
      data4pheatmap <- data4pheatmap*1
      data4pheatmap[data4pheatmap == 0] <- NA
      # remove NA rows
      ind <- apply(data4pheatmap, 1, function(x) all(is.na(x)))
      data4pheatmap_clear <- data4pheatmap[!ind,]
      data4pheatmap_clear[is.na(data4pheatmap_clear)] <- 0
      if (ncol(data4pheatmap) < 2){
        cat("\n")
        cat("##"," No heatmap possible. Too less columns in the result
            table.","\n")
      } else {
        pheatmap(data4pheatmap_clear,
                 color = myColor,
                 treeheight_row = 10,
                 angle_col ="45",
                 treeheight_col = 10,
                 legend = TRUE,
                 labels_row = rep("",nrow(data4pheatmap)),
                 labels_col = sample_names_raw)
        cat("\n")
      }
}

5 Summary

# show summary
if (RAZOR == TRUE){
  cat("\n")
  cat("##"," Summary Razor","\n")
  print(kable(results_2gether_razor))
  
  ggplot(melt(results_2gether_razor),aes(x = variable, y = value)) +
  geom_boxplot() + 
  xlab("Sample name") + 
  scale_x_discrete(labels = sample_names_raw) +
  theme(axis.text.x = element_text(angle = 90)) +
  labs(title = "Razor")
}

5.1 Summary Razor

Razor…unique.peptides.E4_D2_2_Gesamt_01 Razor…unique.peptides.E4_D2_2_Gesamt_02 Razor…unique.peptides.E4_D2_3_Gesamt_01 Razor…unique.peptides.E4_D2_3_Gesamt_02 Razor…unique.peptides.E4_D2_Pool_20_B1_01 Razor…unique.peptides.E4_D2_Pool_20_B1_02 Razor…unique.peptides.E4_D2_Pool_20_B10_01 Razor…unique.peptides.E4_D2_Pool_20_B10_02 Razor…unique.peptides.E4_D2_Pool_20_B2_01 Razor…unique.peptides.E4_D2_Pool_20_B2_02 Razor…unique.peptides.E4_D2_Pool_20_B3_01 Razor…unique.peptides.E4_D2_Pool_20_B3_02 Razor…unique.peptides.E4_D2_Pool_20_B4_01 Razor…unique.peptides.E4_D2_Pool_20_B4_02 Razor…unique.peptides.E4_D2_Pool_20_B5_01 Razor…unique.peptides.E4_D2_Pool_20_B5_02 Razor…unique.peptides.E4_D2_Pool_20_B6_01 Razor…unique.peptides.E4_D2_Pool_20_B6_02 Razor…unique.peptides.E4_D2_Pool_20_B7_01 Razor…unique.peptides.E4_D2_Pool_20_B7_02 Razor…unique.peptides.E4_D2_Pool_20_B8_01 Razor…unique.peptides.E4_D2_Pool_20_B8_02 Razor…unique.peptides.E4_D2_Pool_20_B9_01 Razor…unique.peptides.E4_D2_Pool_20_B9_02
16 14 20 21 2 4 5 5 2 3 5 4 8 6 11 10 17 18 15 15 10 8 6 7

if (UNIQUE == TRUE){
  cat("\n")
  cat("##"," Summary Unique","\n")
  print(kable(results_2gether_unique))
  
  ggplot(melt(results_2gether_unique),aes(x = variable, y = value)) +
  geom_boxplot() + 
  xlab("Sample name") + 
  scale_x_discrete(labels = sample_names_raw) +
  theme(axis.text.x = element_text(angle = 90)) +
  labs(title = "Unique")
}

5.2 Summary Unique

Unique.peptides.E4_D2_2_Gesamt_01 Unique.peptides.E4_D2_2_Gesamt_02 Unique.peptides.E4_D2_3_Gesamt_01 Unique.peptides.E4_D2_3_Gesamt_02 Unique.peptides.E4_D2_Pool_20_B1_01 Unique.peptides.E4_D2_Pool_20_B1_02 Unique.peptides.E4_D2_Pool_20_B10_01 Unique.peptides.E4_D2_Pool_20_B10_02 Unique.peptides.E4_D2_Pool_20_B2_01 Unique.peptides.E4_D2_Pool_20_B2_02 Unique.peptides.E4_D2_Pool_20_B3_01 Unique.peptides.E4_D2_Pool_20_B3_02 Unique.peptides.E4_D2_Pool_20_B4_01 Unique.peptides.E4_D2_Pool_20_B4_02 Unique.peptides.E4_D2_Pool_20_B5_01 Unique.peptides.E4_D2_Pool_20_B5_02 Unique.peptides.E4_D2_Pool_20_B6_01 Unique.peptides.E4_D2_Pool_20_B6_02 Unique.peptides.E4_D2_Pool_20_B7_01 Unique.peptides.E4_D2_Pool_20_B7_02 Unique.peptides.E4_D2_Pool_20_B8_01 Unique.peptides.E4_D2_Pool_20_B8_02 Unique.peptides.E4_D2_Pool_20_B9_01 Unique.peptides.E4_D2_Pool_20_B9_02
11 9 14 15 0 1 3 2 1 1 2 2 4 2 6 6 12 13 9 9 7 5 4 5

if (LFQ == TRUE){
  cat("\n")
  cat("##"," Summary LFQ","\n")
  print(kable(results_2gether_lfq))
  
  ggplot(melt(results_2gether_lfq),aes(x = variable, y = value)) +
  geom_boxplot() + 
  xlab("Sample name") + 
  scale_x_discrete(labels = sample_names_raw) +
  theme(axis.text.x = element_text(angle = 90)) +
  labs(title = "LFQ")
}

# save results
results_final <- data.frame(results_2gether_razor,results_2gether_unique,results_2gether_lfq)
# get rid of NA columns
results_final <- results_final[,colSums(is.na(results_final))<nrow(results_final)]
write.csv2(results_final, file = paste(params$pathRDS,"_summary.csv",sep="")) 
saveRDS(results_razor, file = paste(params$pathRDS,"_Razor.RDS",sep=""))
saveRDS(results_unique, file = paste(params$pathRDS,"_Unique.RDS",sep=""))
saveRDS(results_lfq, file = paste(params$pathRDS,"_LFQ.RDS",sep=""))

6 Which proteins are in the extracts?

  • used Razor peptides and at least 2 peptides for identification
p <-  ggplot(melt(results_2gether_razor),aes(x = variable, y = value)) +
  geom_boxplot() + 
  xlab("Sample name") + 
  scale_x_discrete(labels = sample_names_raw) +
  theme(axis.text.x = element_text(angle = 90)) +
  labs(title = "Razor")
ggplotly(p)
dat <- results_razor[[1]]
yeasx <- grep("YEASX",dat$FASTA)
wheat <- grep("WHEAT",dat$FASTA)
# write human oder trica in a column
for (i in 1:nrow(dat)){
  if (grepl("YEASX",dat$FASTA[i])){
    dat$org[i] <- "YEASX"
  }
  else if (grepl("WHEAT",dat$FASTA[i])){
    dat$org[i] <- "WHEAT"
  }
  else {
    dat$org[i] <- NA
  }
}

dat$org[intersect(yeasx,wheat)] <- "YEASX/WHEAT"

dat4plot <- dat[,-c(ncol(dat)-1)]
dat4plot_long <- melt(dat4plot, id = c("org"))
tmp <- regmatches(dat4plot_long$variable,gregexpr("(?<=des.).*",dat4plot_long$variable,perl=TRUE))
tmp <- unlist(tmp)
dat4plot_long$variable <- tmp

p <- ggplot(data=dat4plot_long, aes(x=variable, y=as.numeric(value), fill=org)) +
  stat_summary(fun.y = sum, geom = "bar", position = "dodge") +
  xlab("sample name") +
  ylab("counts") +
  theme(axis.text.x = element_text(angle = 90))
ggplotly(p)
for (i in 1:(ncol(dat)-2)){
  cat("\n")
  text <- paste(" Fasta Headers - Sample",sample_names_raw[i])
  cat("##",text,"\n")
  cat("\"n")
  print(dat$fasta[which(dat[,i])])
}

6.1 Fasta Headers - Sample E4_D2_2_Gesamt_01

“nNULL

6.2 Fasta Headers - Sample E4_D2_2_Gesamt_02

“nNULL

6.3 Fasta Headers - Sample E4_D2_3_Gesamt_01

“nNULL

6.4 Fasta Headers - Sample E4_D2_3_Gesamt_02

“nNULL

6.5 Fasta Headers - Sample E4_D2_Pool_20_B1_01

“nNULL

6.6 Fasta Headers - Sample E4_D2_Pool_20_B1_02

“nNULL

6.7 Fasta Headers - Sample E4_D2_Pool_20_B10_01

“nNULL

6.8 Fasta Headers - Sample E4_D2_Pool_20_B10_02

“nNULL

6.9 Fasta Headers - Sample E4_D2_Pool_20_B2_01

“nNULL

6.10 Fasta Headers - Sample E4_D2_Pool_20_B2_02

“nNULL

6.11 Fasta Headers - Sample E4_D2_Pool_20_B3_01

“nNULL

6.12 Fasta Headers - Sample E4_D2_Pool_20_B3_02

“nNULL

6.13 Fasta Headers - Sample E4_D2_Pool_20_B4_01

“nNULL

6.14 Fasta Headers - Sample E4_D2_Pool_20_B4_02

“nNULL

6.15 Fasta Headers - Sample E4_D2_Pool_20_B5_01

“nNULL

6.16 Fasta Headers - Sample E4_D2_Pool_20_B5_02

“nNULL

6.17 Fasta Headers - Sample E4_D2_Pool_20_B6_01

“nNULL

6.18 Fasta Headers - Sample E4_D2_Pool_20_B6_02

“nNULL

6.19 Fasta Headers - Sample E4_D2_Pool_20_B7_01

“nNULL

6.20 Fasta Headers - Sample E4_D2_Pool_20_B7_02

“nNULL

6.21 Fasta Headers - Sample E4_D2_Pool_20_B8_01

“nNULL

6.22 Fasta Headers - Sample E4_D2_Pool_20_B8_02

“nNULL

6.23 Fasta Headers - Sample E4_D2_Pool_20_B9_01

“nNULL

6.24 Fasta Headers - Sample E4_D2_Pool_20_B9_02

“nNULL

7 Used time

## [1] "Time used for analysis: 0.6 minutes"